package mylibrary.spider.htmlParser;

import java.io.ByteArrayOutputStream;

import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.htmlparser.Parser;
import org.htmlparser.Tag;
import org.htmlparser.visitors.NodeVisitor;

public class Test {

	public static void main(String[] args) throws Exception {
		
		HttpGet request = new HttpGet("http://www.12345.suzhou.gov.cn/");
		HttpResponse response = new DefaultHttpClient().execute(request);
		int statusCode = response.getStatusLine().getStatusCode();
		if(statusCode == 200) {
			ByteArrayOutputStream bos = new ByteArrayOutputStream();
			response.getEntity().writeTo(bos);
			bos.close();
			String content = bos.toString();
			
			//Entry entry = new Entry();
			
			NodeVisitor linkVisitor = new NodeVisitor(){
				public void visitTag(Tag tag) {
					String name = tag.getTagName();
					if("a".equalsIgnoreCase(name)) {
						String hrefValue = tag.getAttribute("href");
						System.out.println(hrefValue);
						if(hrefValue!=null && hrefValue.startsWith("/")) {
							
						}
					}
				}
			};
			
			Parser parser = new Parser(content);
			parser.visitAllNodesWith(linkVisitor);
		}
		
		
	}
}
